import zipfile
with zipfile.ZipFile("archive.zip","r") as zip_ref:
zip_ref.extractall()
import pandas as pd
dataset = pd.read_csv('Latest Covid-19 India Status.csv', low_memory = False)
print(dataset.shape)
dataset.head(3)
(36, 8)
| State/UTs | Total Cases | Active | Discharged | Deaths | Active Ratio (%) | Discharge Ratio (%) | Death Ratio (%) | |
|---|---|---|---|---|---|---|---|---|
| 0 | Andaman and Nicobar | 7560 | 10 | 7421 | 129 | 0.13 | 98.16 | 1.71 |
| 1 | Andhra Pradesh | 2004590 | 13677 | 1977163 | 13750 | 0.68 | 98.63 | 0.69 |
| 2 | Arunachal Pradesh | 52409 | 1127 | 51023 | 259 | 2.15 | 97.36 | 0.49 |
dataset.columns = dataset.columns.str.replace(' ','_')
dataset.head(3)
| State/UTs | Total_Cases | Active | Discharged | Deaths | Active_Ratio_(%) | Discharge_Ratio_(%) | Death_Ratio_(%) | |
|---|---|---|---|---|---|---|---|---|
| 0 | Andaman and Nicobar | 7560 | 10 | 7421 | 129 | 0.13 | 98.16 | 1.71 |
| 1 | Andhra Pradesh | 2004590 | 13677 | 1977163 | 13750 | 0.68 | 98.63 | 0.69 |
| 2 | Arunachal Pradesh | 52409 | 1127 | 51023 | 259 | 2.15 | 97.36 | 0.49 |
dataset = dataset.rename(columns={"State/UTs":"State_UTs"})
dataset = dataset.rename(columns={"Active_Ratio_(%)":"Active_Ratio"})
dataset = dataset.rename(columns={"Discharge_Ratio_(%)":"Discharge_Ratio"})
dataset = dataset.rename(columns={"Death_Ratio_(%)":"Death_Ratio"})
dataset.head(3)
| State_UTs | Total_Cases | Active | Discharged | Deaths | Active_Ratio | Discharge_Ratio | Death_Ratio | |
|---|---|---|---|---|---|---|---|---|
| 0 | Andaman and Nicobar | 7560 | 10 | 7421 | 129 | 0.13 | 98.16 | 1.71 |
| 1 | Andhra Pradesh | 2004590 | 13677 | 1977163 | 13750 | 0.68 | 98.63 | 0.69 |
| 2 | Arunachal Pradesh | 52409 | 1127 | 51023 | 259 | 2.15 | 97.36 | 0.49 |
dataset["State_UTs"].count()
36
dataset.sort_values(["Active"], ascending=False)
# dataset.head(5)
| State_UTs | Total_Cases | Active | Discharged | Deaths | Active_Ratio | Discharge_Ratio | Death_Ratio | |
|---|---|---|---|---|---|---|---|---|
| 16 | Kerala | 3851984 | 159870 | 3672357 | 19757 | 4.15 | 95.34 | 0.51 |
| 20 | Maharashtra | 6432649 | 53260 | 6243034 | 136355 | 0.83 | 97.05 | 2.12 |
| 15 | Karnataka | 2941026 | 19810 | 2884032 | 37184 | 0.67 | 98.06 | 1.26 |
| 30 | Tamil Nadu | 2604074 | 18603 | 2550710 | 34761 | 0.71 | 97.95 | 1.33 |
| 1 | Andhra Pradesh | 2004590 | 13677 | 1977163 | 13750 | 0.68 | 98.63 | 0.69 |
| 35 | West Bengal | 1544109 | 9217 | 1516509 | 18383 | 0.60 | 98.21 | 1.19 |
| 3 | Assam | 585689 | 8005 | 572084 | 5600 | 1.37 | 97.68 | 0.96 |
| 25 | Odisha | 1002323 | 7461 | 987369 | 7493 | 0.74 | 98.51 | 0.75 |
| 23 | Mizoram | 54057 | 6664 | 47192 | 201 | 12.33 | 87.30 | 0.37 |
| 31 | Telengana | 655732 | 6276 | 645594 | 3862 | 0.96 | 98.45 | 0.59 |
| 21 | Manipur | 111598 | 3610 | 106229 | 1759 | 3.23 | 95.19 | 1.58 |
| 22 | Meghalaya | 74232 | 2820 | 70127 | 1285 | 3.80 | 94.47 | 1.73 |
| 12 | Himachal Pradesh | 212260 | 2054 | 206628 | 3578 | 0.97 | 97.35 | 1.69 |
| 29 | Sikkim | 29477 | 1548 | 27562 | 367 | 5.25 | 93.50 | 1.25 |
| 2 | Arunachal Pradesh | 52409 | 1127 | 51023 | 259 | 2.15 | 97.36 | 0.49 |
| 32 | Tripura | 82384 | 1113 | 80480 | 791 | 1.35 | 97.69 | 0.96 |
| 13 | Jammu and Kashmir | 324420 | 1055 | 318961 | 4404 | 0.33 | 98.32 | 1.36 |
| 9 | Goa | 173357 | 932 | 169239 | 3186 | 0.54 | 97.62 | 1.84 |
| 24 | Nagaland | 29669 | 888 | 28170 | 611 | 2.99 | 94.95 | 2.06 |
| 26 | Puducherry | 123007 | 765 | 120433 | 1809 | 0.62 | 97.91 | 1.47 |
| 11 | Haryana | 770380 | 664 | 760047 | 9669 | 0.09 | 98.66 | 1.26 |
| 6 | Chhattisgarh | 1004230 | 653 | 990022 | 13555 | 0.07 | 98.59 | 1.35 |
| 27 | Punjab | 600342 | 423 | 583564 | 16355 | 0.07 | 97.21 | 2.72 |
| 8 | Delhi | 1437485 | 411 | 1411995 | 25079 | 0.03 | 98.23 | 1.74 |
| 33 | Uttar Pradesh | 1709152 | 352 | 1686006 | 22794 | 0.02 | 98.65 | 1.33 |
| 34 | Uttarakhand | 342786 | 310 | 335099 | 7377 | 0.09 | 97.76 | 2.15 |
| 10 | Gujarat | 825330 | 160 | 815091 | 10079 | 0.02 | 98.76 | 1.22 |
| 14 | Jharkhand | 347755 | 153 | 342470 | 5132 | 0.04 | 98.48 | 1.48 |
| 28 | Rajasthan | 954040 | 128 | 944958 | 8954 | 0.01 | 99.05 | 0.94 |
| 4 | Bihar | 725605 | 102 | 715853 | 9650 | 0.01 | 98.66 | 1.33 |
| 19 | Madhya Pradesh | 792109 | 84 | 781509 | 10516 | 0.01 | 98.66 | 1.33 |
| 17 | Ladakh | 20500 | 55 | 20238 | 207 | 0.27 | 98.72 | 1.01 |
| 5 | Chandigarh | 65069 | 35 | 64222 | 812 | 0.05 | 98.70 | 1.25 |
| 18 | Lakshadweep | 10318 | 28 | 10239 | 51 | 0.27 | 99.23 | 0.49 |
| 0 | Andaman and Nicobar | 7560 | 10 | 7421 | 129 | 0.13 | 98.16 | 1.71 |
| 7 | Dadra and Nagar Haveli and Daman and Diu | 10659 | 4 | 10651 | 4 | 0.04 | 99.92 | 0.04 |
dataset.sort_values(["Death_Ratio"], ascending=False)
# dataset.head(5)
| State_UTs | Total_Cases | Active | Discharged | Deaths | Active_Ratio | Discharge_Ratio | Death_Ratio | |
|---|---|---|---|---|---|---|---|---|
| 27 | Punjab | 600342 | 423 | 583564 | 16355 | 0.07 | 97.21 | 2.72 |
| 34 | Uttarakhand | 342786 | 310 | 335099 | 7377 | 0.09 | 97.76 | 2.15 |
| 20 | Maharashtra | 6432649 | 53260 | 6243034 | 136355 | 0.83 | 97.05 | 2.12 |
| 24 | Nagaland | 29669 | 888 | 28170 | 611 | 2.99 | 94.95 | 2.06 |
| 9 | Goa | 173357 | 932 | 169239 | 3186 | 0.54 | 97.62 | 1.84 |
| 8 | Delhi | 1437485 | 411 | 1411995 | 25079 | 0.03 | 98.23 | 1.74 |
| 22 | Meghalaya | 74232 | 2820 | 70127 | 1285 | 3.80 | 94.47 | 1.73 |
| 0 | Andaman and Nicobar | 7560 | 10 | 7421 | 129 | 0.13 | 98.16 | 1.71 |
| 12 | Himachal Pradesh | 212260 | 2054 | 206628 | 3578 | 0.97 | 97.35 | 1.69 |
| 21 | Manipur | 111598 | 3610 | 106229 | 1759 | 3.23 | 95.19 | 1.58 |
| 14 | Jharkhand | 347755 | 153 | 342470 | 5132 | 0.04 | 98.48 | 1.48 |
| 26 | Puducherry | 123007 | 765 | 120433 | 1809 | 0.62 | 97.91 | 1.47 |
| 13 | Jammu and Kashmir | 324420 | 1055 | 318961 | 4404 | 0.33 | 98.32 | 1.36 |
| 6 | Chhattisgarh | 1004230 | 653 | 990022 | 13555 | 0.07 | 98.59 | 1.35 |
| 30 | Tamil Nadu | 2604074 | 18603 | 2550710 | 34761 | 0.71 | 97.95 | 1.33 |
| 4 | Bihar | 725605 | 102 | 715853 | 9650 | 0.01 | 98.66 | 1.33 |
| 19 | Madhya Pradesh | 792109 | 84 | 781509 | 10516 | 0.01 | 98.66 | 1.33 |
| 33 | Uttar Pradesh | 1709152 | 352 | 1686006 | 22794 | 0.02 | 98.65 | 1.33 |
| 15 | Karnataka | 2941026 | 19810 | 2884032 | 37184 | 0.67 | 98.06 | 1.26 |
| 11 | Haryana | 770380 | 664 | 760047 | 9669 | 0.09 | 98.66 | 1.26 |
| 5 | Chandigarh | 65069 | 35 | 64222 | 812 | 0.05 | 98.70 | 1.25 |
| 29 | Sikkim | 29477 | 1548 | 27562 | 367 | 5.25 | 93.50 | 1.25 |
| 10 | Gujarat | 825330 | 160 | 815091 | 10079 | 0.02 | 98.76 | 1.22 |
| 35 | West Bengal | 1544109 | 9217 | 1516509 | 18383 | 0.60 | 98.21 | 1.19 |
| 17 | Ladakh | 20500 | 55 | 20238 | 207 | 0.27 | 98.72 | 1.01 |
| 32 | Tripura | 82384 | 1113 | 80480 | 791 | 1.35 | 97.69 | 0.96 |
| 3 | Assam | 585689 | 8005 | 572084 | 5600 | 1.37 | 97.68 | 0.96 |
| 28 | Rajasthan | 954040 | 128 | 944958 | 8954 | 0.01 | 99.05 | 0.94 |
| 25 | Odisha | 1002323 | 7461 | 987369 | 7493 | 0.74 | 98.51 | 0.75 |
| 1 | Andhra Pradesh | 2004590 | 13677 | 1977163 | 13750 | 0.68 | 98.63 | 0.69 |
| 31 | Telengana | 655732 | 6276 | 645594 | 3862 | 0.96 | 98.45 | 0.59 |
| 16 | Kerala | 3851984 | 159870 | 3672357 | 19757 | 4.15 | 95.34 | 0.51 |
| 2 | Arunachal Pradesh | 52409 | 1127 | 51023 | 259 | 2.15 | 97.36 | 0.49 |
| 18 | Lakshadweep | 10318 | 28 | 10239 | 51 | 0.27 | 99.23 | 0.49 |
| 23 | Mizoram | 54057 | 6664 | 47192 | 201 | 12.33 | 87.30 | 0.37 |
| 7 | Dadra and Nagar Haveli and Daman and Diu | 10659 | 4 | 10651 | 4 | 0.04 | 99.92 | 0.04 |
dataset[["Total_Cases", "Deaths"]].sum(axis=0)
Total_Cases 32512366 Deaths 435758 dtype: int64
dataset.describe()
| Total_Cases | Active | Discharged | Deaths | Active_Ratio | Discharge_Ratio | Death_Ratio | |
|---|---|---|---|---|---|---|---|
| count | 3.600000e+01 | 36.000000 | 3.600000e+01 | 36.000000 | 36.000000 | 36.000000 | 36.000000 |
| mean | 9.031213e+05 | 8953.527778 | 8.820634e+05 | 12104.388889 | 1.263611 | 97.471667 | 1.265278 |
| std | 1.316180e+06 | 27649.920018 | 1.276942e+06 | 23385.359462 | 2.313456 | 2.239131 | 0.563770 |
| min | 7.560000e+03 | 4.000000 | 7.421000e+03 | 4.000000 | 0.010000 | 87.300000 | 0.040000 |
| 25% | 7.194125e+04 | 158.250000 | 6.865075e+04 | 806.750000 | 0.065000 | 97.357500 | 0.955000 |
| 50% | 4.667220e+05 | 910.000000 | 4.572770e+05 | 5366.000000 | 0.570000 | 98.185000 | 1.295000 |
| 75% | 1.002800e+06 | 6373.000000 | 9.880322e+05 | 13603.750000 | 1.065000 | 98.652500 | 1.607500 |
| max | 6.432649e+06 | 159870.000000 | 6.243034e+06 | 136355.000000 | 12.330000 | 99.920000 | 2.720000 |
dataset.corr()
| Total_Cases | Active | Discharged | Deaths | Active_Ratio | Discharge_Ratio | Death_Ratio | |
|---|---|---|---|---|---|---|---|
| Total_Cases | 1.000000 | 0.657405 | 0.999924 | 0.904746 | -0.083121 | 0.062078 | 0.092816 |
| Active | 0.657405 | 1.000000 | 0.649104 | 0.373963 | 0.217516 | -0.181338 | -0.173172 |
| Discharged | 0.999924 | 0.649104 | 1.000000 | 0.906137 | -0.088095 | 0.066903 | 0.094055 |
| Deaths | 0.904746 | 0.373963 | 0.906137 | 1.000000 | -0.125013 | 0.055103 | 0.292854 |
| Active_Ratio | -0.083121 | 0.217516 | -0.088095 | -0.125013 | 1.000000 | -0.969908 | -0.251648 |
| Discharge_Ratio | 0.062078 | -0.181338 | 0.066903 | 0.055103 | -0.969908 | 1.000000 | 0.008449 |
| Death_Ratio | 0.092816 | -0.173172 | 0.094055 | 0.292854 | -0.251648 | 0.008449 | 1.000000 |
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
fig = make_subplots(rows=2, cols=2, subplot_titles= ["Total_Cases", "Deaths", "Active", "Discharged"])
total_cases = go.Histogram(x=dataset["Total_Cases"], nbinsx = 20, name = "Total_Cases")
Deaths = go.Histogram(x=dataset["Deaths"], nbinsx = 20, name = "Deaths")
Active = go.Histogram(x=dataset["Active"], nbinsx = 20, name = "Active")
Discharged = go.Histogram(x=dataset["Discharged"], nbinsx = 20, name = "Discharged")
fig.add_trace(total_cases, 1,1)
fig.add_trace(Deaths, 1,2)
fig.add_trace(Active, 2,1)
fig.add_trace(Discharged, 2,2)
fig.update_layout(showlegend = False)
fig.show()
fig = make_subplots(rows=2, cols=2, subplot_titles= ["Total_Cases", "Deaths", "Active", "Discharged"])
total_cases = go.Box(x=dataset["Total_Cases"], name = "Total_Cases", text=dataset["State_UTs"])
Deaths = go.Box(x=dataset["Deaths"], name = "Deaths", text=dataset["Deaths"])
Active = go.Box(x=dataset["Active"], name = "Active", text=dataset["Active"])
Discharged = go.Box(x=dataset["Discharged"], name = "Discharged", text=dataset["Discharged"])
fig.add_trace(total_cases, 1,1)
fig.add_trace(Deaths, 1,2)
fig.add_trace(Active, 2,1)
fig.add_trace(Discharged, 2,2)
fig.update_layout(showlegend = False)
fig.show()
fig = make_subplots(rows=2, cols=2, subplot_titles= ["Total_Cases", "Deaths", "Active", "Discharged"])
total_cases = go.Bar(y=dataset["Total_Cases"], name = "Total_Cases", hovertext=dataset["State_UTs"])
Deaths = go.Bar(y=dataset["Deaths"], name = "Deaths", hovertext=["Deaths"])
Active = go.Bar(y=dataset["Active"], name = "Active", hovertext=["Active"])
Discharged = go.Bar(y=dataset["Discharged"], name = "Discharged", hovertext=["Discharged"])
fig.add_trace(total_cases, 1,1)
fig.add_trace(Deaths, 1,2)
fig.add_trace(Active, 2,1)
fig.add_trace(Discharged, 2,2)
fig.update_layout(showlegend = False)
fig.show()
fig = go.Figure([go.Bar(y=dataset["Total_Cases"], name = "Total_Cases", hovertext=dataset["State_UTs"],x=dataset["State_UTs"]),
go.Bar(y=dataset["Discharged"], name = "Discharged", hovertext=["Discharged"], x=dataset["State_UTs"])])
fig.update_layout(barmode="group")
fig.update_layout(title="Total vs Discharged Cases")
fig.show()
fig = make_subplots(rows=2, cols=2, subplot_titles=["Total_Cases", "Deaths", "Active", "Discharged"],
specs=[[{"type":"domain"},{"type":"domain"}],
[{"type":"domain"},{"type":"domain"}]])
total_cases = go.Pie(values=dataset["Total_Cases"], name = "Total_Cases", labels=dataset["State_UTs"])
Deaths = go.Pie(values=dataset["Deaths"], name = "Deaths", labels=dataset["Deaths"])
Active = go.Pie(values=dataset["Active"], name = "Active", labels=dataset["Active"])
Discharged = go.Pie(values=dataset["Discharged"], name = "Discharged", labels=dataset["Discharged"])
fig.add_trace(total_cases, 1,1)
fig.add_trace(Deaths, 1,2)
fig.add_trace(Active, 2,1)
fig.add_trace(Discharged, 2,2)
fig.update_traces(hoverinfo="percent+label")
fig.update_layout(showlegend = False)
fig.update_traces(textposition = "inside")
fig = go.Figure(fig)
fig.show()
fig = go.Figure([go.Scatter(x=dataset["Total_Cases"], y=dataset["Deaths"], mode="markers", text=dataset["State_UTs"])])
fig.update_layout(title="Deats vs Total_Cases", xaxis_title="Total_Cases", yaxis_title="Deaths")
fig.show()
X = dataset["Total_Cases"].to_numpy()
Y = dataset["Deaths"].to_numpy()
X.shape, Y.shape
((36,), (36,))
# X = X.reshape((len(X),1))
# Y = Y.reshape((len(X),1))
# X.shape, Y.shape
from sklearn.linear_model import LinearRegression
model = LinearRegression()
# model.fit(X,Y)
model.fit(X.reshape((len(X),1)),Y)
(model.intercept_, model.coef_[0])
(-2413.4397885011676, 0.016075170671554388)
model.score(X.reshape((len(X),1)),Y)
0.8185661855516211
0.904746**2
0.8185653245160001
predictions = model.predict(X.reshape((len(X),1)))
predictions.shape
(36,)
fig = go.Figure([go.Scatter(x=dataset["Total_Cases"], y=dataset["Deaths"], mode="markers", text=dataset["State_UTs"], name= "Actual Deaths"),
go.Scatter(x=dataset["Total_Cases"], y=predictions, name = "Predicted Deaths")])
fig.update_layout(title="Deaths vs Total_Cases", xaxis_title="Total_Cases", yaxis_title="Deaths")
fig.show()